#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @FileName  :file_demo.py
# @Time      :2023/6/30 
# @Author    :CL
# @email     :1037654919@qq.com

import json
import pandas as pd
import os
import shutil

def readname(filePath):
    # filePath = 'pinterest/'
    name = os.listdir(filePath)
    return name

def clean_data(filePath):
    global  clean_count
    #
    # "清洗pinterest数据，先在文件夹里边搜索jpg,批量删除不要的图片，再打开程序，运行程序，删除相应文件夹：判断逻辑：文件夹里边文件小于3"


    print('开始清洗：',filePath)
    name = readname(filePath)
    for i in name:
        path2= filePath+'/{}/'.format(i)
        # delete_wrong_image(path2)
        name2 = os.listdir(path2)
        if len(name2)<=2:
            shutil.rmtree(path2)
            print(i,len(name2))
            clean_count+=1
            continue
        # flag =0
        # for ss in name2:
        #     if 'thumb' in ss:
        #         flag =1
        #         print(ss)
        #         break
        # if flag ==1:
        #     shutil.rmtree(path2)
        #     print(i,len(name2))
        #     clean_count+=1
        #     continue


def delete_wrong_image(path):
    # 删除无效图片，并删除对应txt
    global delete_count
    import requests
    import io
    # 获取图片对象
    # path = "/home/chenglei3/work/2023.07/AI_car_picture/car body  design/car_body_mondb/"

    name = os.listdir(path)
    for na in name :
        # print(na)
        if '.jpg' in na:
            filename = str(path)+str(na)
            stats = os.stat(filename)
            if stats.st_size==79:
                delete_count +=1
                # print(stats.st_size)
                print(stats.st_size,na)
                txtfile = na.replace('.jpg','.txt')
                try:
                    os.remove(path+txtfile)
                except:
                    pass
                try:
                    os.remove(filename)
                except :
                    pass




    pass

def count_file(filePath):
    # filePath = 'pinterest/'
    name = os.listdir(filePath)
    return len(name)


if __name__ == "__main__":
    name = readname('/home/chenglei3/work/2023.07/AI_car_picture/car body  design/car_body_mondb/')
    print(name)
    clean_count=0
    count = 0
    delete_count =0
    for na in name:
        filepath = '/home/chenglei3/work/2023.07/AI_car_picture/car body  design/car_body_mondb/{}/'.format(na)

        names = os.listdir(filepath)
        for na2 in names[:]:
            path = filepath +'{}/'.format(na2)
            # print(path)
            delete_wrong_image(path)
        clean_data(filepath)

        names = os.listdir(filepath)
        for na2 in names[:]:
            path = filepath + '{}/'.format(na2)
            count +=(len(os.listdir(path))-1)/2
        # break

    print('delete_count:',delete_count)
    print('clean_count:',clean_count)
    print('count:', count)



